{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr’\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('/data/home/marmot/camtrap/PyCharm/CameraTraps-benchmark')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from collections import OrderedDict\n",
    "from copy import deepcopy\n",
    "import os\n",
    "\n",
    "from data_management.cct_json_utils import CameraTrapJsonUtils, IndexedJsonDb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Split each database by locations\n",
    "\n",
    "in publicly released splits."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def open_json(path):\n",
    "    with open(path) as f:\n",
    "        j = json.load(f)\n",
    "    return j"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_split_info(splits_path):\n",
    "    splits = open_json(splits_path)\n",
    "    split_info = {}\n",
    "    split_info['train'] = set(splits['splits']['train'])\n",
    "    split_info['val'] = set(splits['splits']['val'])\n",
    "    print(len(split_info['train']))\n",
    "    print(len(split_info['val']))\n",
    "    return split_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split_by_location(split_info, db_json_path, out_path_template):\n",
    "    # split_info is a dict, split_name: list or set of locations in that split\n",
    "    # db_json_path points to a CCT formatted json\n",
    "    # out_path_template is a path to the output splitted jsons, where {} is for the name of the split\n",
    "    \n",
    "    db_json = open_json(db_json_path)\n",
    "    \n",
    "    for split_name, locations in split_info.items():\n",
    "        print('processing split {}'.format(split_name))\n",
    "        db_split = CameraTrapJsonUtils.get_entries_from_locations(db_json, locations)\n",
    "        db_split['info'] = db_json['info']  # too lazy to modify the info section\n",
    "        db_split['categories'] = db_json['categories']  # categories are left unchanged\n",
    "        \n",
    "        db_split = CameraTrapJsonUtils.order_db_keys(db_split)\n",
    "        \n",
    "        split_out_path = out_path_template.format(split_name)\n",
    "        with open(split_out_path, 'w') as f:\n",
    "            json.dump(db_split, f, indent=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CCT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "cct_images_path = '/beaver_disk/camtrap/caltech/original/caltech_images_20190919.json'\n",
    "cct_boxes_path = '/beaver_disk/camtrap/caltech/original/caltech_bboxes_20190904.json'\n",
    "cct_boxes_multiclass_path = '/beaver_disk/camtrap/caltech/original/caltech_bboxes_multiclass_20190922.json'\n",
    "\n",
    "cct_splits_path = '/beaver_disk/camtrap/caltech/original/CaltechCameraTrapsSplits_v0.json'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100\n",
      "40\n"
     ]
    }
   ],
   "source": [
    "cct_split_info = get_split_info(cct_splits_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "cct_out_dir = '/beaver_disk/camtrap/caltech/benchmark'\n",
    "\n",
    "cct_images_out = os.path.join(cct_out_dir, 'caltech_images_20190919_{}.json')\n",
    "cct_boxes_out = os.path.join(cct_out_dir, 'caltech_bboxes_20190904_{}.json')\n",
    "cct_boxes_multiclass_out = os.path.join(cct_out_dir, 'caltech_bboxes_multiclass_20190922_{}.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "processing split val\n",
      "Original DB has 243100 image and 245118 annotation entries.\n",
      "New DB has 62410 image and 63274 annotation entries.\n",
      "processing split train\n",
      "Original DB has 243100 image and 245118 annotation entries.\n",
      "New DB has 180690 image and 181844 annotation entries.\n"
     ]
    }
   ],
   "source": [
    "split_by_location(cct_split_info, cct_images_path, cct_images_out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "processing split val\n",
      "Original DB has 63102 image and 66406 annotation entries.\n",
      "New DB has 25177 image and 26537 annotation entries.\n",
      "processing split train\n",
      "Original DB has 63102 image and 66406 annotation entries.\n",
      "New DB has 37925 image and 39869 annotation entries.\n"
     ]
    }
   ],
   "source": [
    "split_by_location(cct_split_info, cct_boxes_path, cct_boxes_out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "processing split val\n",
      "Original DB has 63025 image and 65112 annotation entries.\n",
      "New DB has 25108 image and 25751 annotation entries.\n",
      "processing split train\n",
      "Original DB has 63025 image and 65112 annotation entries.\n",
      "New DB has 37917 image and 39361 annotation entries.\n"
     ]
    }
   ],
   "source": [
    "split_by_location(cct_split_info, cct_boxes_multiclass_path, cct_boxes_multiclass_out)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SS Season 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "ss1_images_path = '/beaver_disk/camtrap/ss_season1/original/ss_season1_images_20190919.json'\n",
    "ss1_boxes_path = '/beaver_disk/camtrap/ss_season1/original/ss_season1_bboxes_20190923.json'\n",
    "ss1_boxes_multiclass_path = '/beaver_disk/camtrap/ss_season1/original/ss_season1_bboxes_multiclass_20190923.json'\n",
    "\n",
    "ss1_splits_path = '/beaver_disk/camtrap/ss_season1/original/SnapshotSerengetiSplits_v0.json'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "179\n",
      "46\n"
     ]
    }
   ],
   "source": [
    "ss1_split_info = get_split_info(ss1_splits_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "ss1_out_dir = '/beaver_disk/camtrap/ss_season1/benchmark'\n",
    "\n",
    "ss1_images_out = os.path.join(ss1_out_dir, 'ss_season1_images_20190919_{}.json')\n",
    "ss1_boxes_out = os.path.join(ss1_out_dir, 'ss_season1_bboxes_20190923_{}.json')\n",
    "ss1_boxes_multiclass_out = os.path.join(ss1_out_dir, 'ss_season1_bboxes_multiclass_20190923_{}.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "processing split val\n",
      "Original DB has 411414 image and 412858 annotation entries.\n",
      "New DB has 96823 image and 97253 annotation entries.\n",
      "processing split train\n",
      "Original DB has 411414 image and 412858 annotation entries.\n",
      "New DB has 314591 image and 315605 annotation entries.\n"
     ]
    }
   ],
   "source": [
    "split_by_location(ss1_split_info, ss1_images_path, ss1_images_out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "processing split val\n",
      "Original DB has 82938 image and 146359 annotation entries.\n",
      "New DB has 18532 image and 31522 annotation entries.\n",
      "processing split train\n",
      "Original DB has 82938 image and 146359 annotation entries.\n",
      "New DB has 64406 image and 114837 annotation entries.\n"
     ]
    }
   ],
   "source": [
    "split_by_location(ss1_split_info, ss1_boxes_path, ss1_boxes_out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "processing split val\n",
      "Original DB has 82611 image and 18152 annotation entries.\n",
      "New DB has 18417 image and 4037 annotation entries.\n",
      "processing split train\n",
      "Original DB has 82611 image and 18152 annotation entries.\n",
      "New DB has 64194 image and 14115 annotation entries.\n"
     ]
    }
   ],
   "source": [
    "split_by_location(ss1_split_info, ss1_boxes_multiclass_path, ss1_boxes_multiclass_out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
